library(sf)
library(raster)
library(tidyverse)
library(magrittr)
# Read crime data to tibble
Crime_2019 <- read_csv("data/crime-2019.csv",
col_types = list(ID = col_integer(),
`Case Number` = col_character(),
Date = col_datetime(format = "%m/%d/%Y %H:%M:%S %p"),
Block = col_factor(),
IUCR = col_factor(),
`Primary Type` = col_factor(),
Description = col_factor(),
`Location Description` = col_factor(),
Arrest = col_logical(),
Domestic = col_logical(),
Beat = col_factor(),
District = col_factor(),
Ward = col_integer(),
`Community Area` = col_integer(),
`X Coordinate` = col_double(),
`Y Coordinate` = col_double(),
Year = col_integer(),
`Updated On` = col_datetime(format = "%m/%d/%Y %H:%M:%S %p"),
Latitude = col_double(),
Longitude = col_double(),
Location = col_character()))
Crime_NA <- na.omit(Crime_2019) %>% tibble()
Crime_2019
## # A tibble: 260,430 x 22
## ID `Case Number` Date Block IUCR `Primary Type`
## <int> <chr> <dttm> <fct> <fct> <fct>
## 1 1.22e7 JD444016 2019-10-03 14:00:00 003X~ 1150 DECEPTIVE PRA~
## 2 1.22e7 JD443965 2019-12-01 00:01:00 038X~ 1752 OFFENSE INVOL~
## 3 1.22e7 JD442995 2019-11-27 08:00:00 055X~ 5002 OTHER OFFENSE
## 4 1.20e7 JD194800 2019-05-10 11:00:00 043X~ 1195 DECEPTIVE PRA~
## 5 1.19e7 JC532990 2019-12-03 13:08:00 027X~ 041A BATTERY
## 6 1.22e7 JD441586 2019-07-22 00:01:00 071X~ 1150 DECEPTIVE PRA~
## 7 1.22e7 JD399576 2019-12-07 12:33:00 066X~ 1544 SEX OFFENSE
## 8 1.19e7 JC535200 2019-12-04 06:00:00 032X~ 1563 SEX OFFENSE
## 9 1.18e7 JC358770 2019-07-21 18:35:00 041X~ 041A BATTERY
## 10 1.22e7 JD439375 2019-11-23 00:00:00 050X~ 0265 CRIMINAL SEXU~
## # ... with 260,420 more rows, and 16 more variables: Description <fct>,
## # `Location Description` <fct>, Arrest <lgl>, Domestic <lgl>, Beat <fct>,
## # District <fct>, Ward <int>, `Community Area` <int>, `FBI Code` <chr>, `X
## # Coordinate` <dbl>, `Y Coordinate` <dbl>, Year <int>, `Updated On` <dttm>,
## # Latitude <dbl>, Longitude <dbl>, Location <chr>
colnames(Crime_NA) %<>% strsplit(split = " ") %>% lapply(paste, collapse = "")
head(Crime_NA)
## # A tibble: 6 x 22
## ID CaseNumber Date Block IUCR PrimaryType Description
## <int> <chr> <dttm> <fct> <fct> <fct> <fct>
## 1 1.20e7 JD194800 2019-05-10 11:00:00 043X~ 1195 DECEPTIVE ~ FINANCIAL ~
## 2 1.19e7 JC532990 2019-12-03 13:08:00 027X~ 041A BATTERY AGGRAVATED~
## 3 1.22e7 JD399576 2019-12-07 12:33:00 066X~ 1544 SEX OFFENSE SEXUAL EXP~
## 4 1.19e7 JC535200 2019-12-04 06:00:00 032X~ 1563 SEX OFFENSE CRIMINAL S~
## 5 1.18e7 JC358770 2019-07-21 18:35:00 041X~ 041A BATTERY AGGRAVATED~
## 6 1.21e7 JD272123 2019-01-17 00:00:00 075X~ 1750 OFFENSE IN~ CHILD ABUSE
## # ... with 15 more variables: LocationDescription <fct>, Arrest <lgl>,
## # Domestic <lgl>, Beat <fct>, District <fct>, Ward <int>,
## # CommunityArea <int>, FBICode <chr>, XCoordinate <dbl>, YCoordinate <dbl>,
## # Year <int>, UpdatedOn <dttm>, Latitude <dbl>, Longitude <dbl>,
## # Location <chr>
# Load shapefile containing community areas
com_bounds <- st_read("data/community_areas.shp", quiet = TRUE)
com_bounds %<>% arrange(as.integer(area_numbe))
print(com_bounds, n = 3)
## Simple feature collection with 77 features and 9 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -87.94011 ymin: 41.64454 xmax: -87.52414 ymax: 42.02304
## geographic CRS: WGS84(DD)
## First 3 features:
## area area_num_1 area_numbe comarea comarea_id community perimeter
## 1 0 1 1 0 0 ROGERS PARK 0
## 2 0 2 2 0 0 WEST RIDGE 0
## 3 0 3 3 0 0 UPTOWN 0
## shape_area shape_len geometry
## 1 51259902 34052.40 MULTIPOLYGON (((-87.65456 4...
## 2 98429095 43020.69 MULTIPOLYGON (((-87.68465 4...
## 3 65095643 46972.79 MULTIPOLYGON (((-87.64102 4...
plot(st_geometry(com_bounds))
Area numbers in com_bounds correspond to CommunityArea in Crime_NA, making plotting maps easier.
Crime_summary <- Crime_NA %>% select(CommunityArea) %>% table() %>% as.data.frame()
colnames(Crime_summary) <- c("CommunityArea", "Freq")
head(Crime_summary)
## CommunityArea Freq
## 1 1 3991
## 2 2 3416
## 3 3 3283
## 4 4 1761
## 5 5 1238
## 6 6 5857
com_bounds$crime_summary <- Crime_summary$Freq
plot(com_bounds["crime_summary"], main = "Crime frequency by community area")
The map is less detailed as it only includes community areas, but its construction is easier and doesn’t require longitude and latitude coordinates (which are often missing).
Use geom_sf to make a ggplot. More details at: https://r-spatial.github.io/sf/articles/sf5.html and https://www.r-spatial.org/r/2018/10/25/ggplot2-sf-2.html
For colours I like to use colorspace: https://cran.r-project.org/web/packages/colorspace/vignettes/colorspace.html
p <- ggplot(com_bounds) + geom_sf(aes(fill = crime_summary, text = community)) +
ggtitle("Crime frequency by community area") +
colorspace::scale_fill_continuous_sequential(5, palette = "Inferno") + theme_void() +
guides(fill = guide_colorbar(title = "Frequency")) +
scale_fill_gradientn(colors = colorspace::sequential_hcl(5, palette = "Inferno")[5:1],
breaks = seq(0, 15000, 2500), limits = c(0, 15000))
p
Trying out interactive plots with plotly.
library(plotly)
p %>% ggplotly(tooltip = "text") %>% style(hoverlabel = list(bgcolor = "white"), hoveron = "fill")
Can use google map backgrounds with plotly and ggplot2.
library(ggmap)
register_google(key = "AIzaSyAv2lYIQ4Iw-Qu4bDPFsGB-Rdosvr_U208")
map_centre <- as.data.frame(geocode("chicago"))
basemap <- get_map(location=c(lon = map_centre$lon, lat = map_centre$lat), zoom=11)
p <- ggmap(basemap) + geom_sf(data = com_bounds, aes(fill = crime_summary, text = community, alpha = 0.3), inherit.aes = F) +
ggtitle("Crime frequency by community area") +
colorspace::scale_fill_continuous_sequential(5, palette = "Inferno") + theme_void() +
guides(fill = guide_colorbar(title = "Frequency")) +
scale_fill_gradientn(colors = colorspace::sequential_hcl(5, palette = "Inferno")[5:1],
breaks = seq(0, 15000, 2500), limits = c(0, 15000))
## Warning: Ignoring unknown aesthetics: text
p %>% ggplotly(tooltip = "text") %>% style(hoverlabel = list(bgcolor = "white"), hoveron = "fill")
Or instead can create interactive maps using leaflet package (but as far as I know doesn’t work with plotly).
library(leaflet)
leaflet(com_bounds) %>% addTiles() %>%
addPolygons(color = "#444444", weight = 1, smoothFactor = 0.5,
opacity = 1.0, fillOpacity = 0.4,
fillColor = ~colorNumeric("inferno", crime_summary, reverse = T)(crime_summary),
highlightOptions = highlightOptions(color = "white", weight = 2,
bringToFront = TRUE)) %>% addLegend(pal = colorNumeric("inferno", com_bounds$crime_summary, reverse = T), values = ~com_bounds$crime_summary, title = "Crimes")
## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=WGS84 +no_defs).
## Need '+proj=longlat +datum=WGS84'